set more off

*This code primarily serves to compile the raw incident-level goecoded BFRS data 
*into a day-tehsil-level data set counting the number of incidents and resultant
*casualties that occurred in a given tehsil on a given day according to the BFRS
*dataset. It also processes some other datasets containing information on
*relevant covariates and merges them with the final day-tehsil-level dataset.



*** Main Data Processing ***

import delimited "$rawData/cy.csv", clear //import incident-level BFRS data

g incidents = 1 //Used to generate incident counts in monthly data later

g date = daystart //We label all incidents as having occurred on the day they 
				  //began. Some incidents did occur over multiple days.

g inj = numberinjured
g totDead = numberkilled
g totCas = totDead + inj //Casualties count

//Drop incidents known not to be insurgent violence
drop if (event=="assassination (drone attack)" | /// 
event=="military/paramilitary/police attack on non-state combatants" | ///
event=="military/paramilitary/police selective violence")
drop if (partyresponsible=="foreign party (afghanistan)" | ///
partyresponsible=="student group" | partyresponsible=="religious party" | ///
partyresponsible=="military/paramilitary" | partyresponsible=="police" | ///
partyresponsible=="foreign party (united states)" | ///
partyresponsible=="intelligence agency" | ///
partyresponsible=="foreign party (multilateral)" | ///
partyresponsible=="professional union/alliance" | ///
partyresponsible=="civil society group/campaign group" | ///
partyresponsible=="political party")

//Replace BFRS location data with geocode-based location data
drop tehsil
rename adm3 tehsil
drop district
rename adm2 district

//Remove extraneous variables
keep totCas inj totDead incidents date district tehsil

//construct placeholder data, used later to fill in days with no incidents
input
"Ambar Utman Khel" "Mohmand Agency" 0 15706 0 0 0
"Bar Chamarkand" "Bajaur Agency" 0 15706 0 0 0
"Bara" "Khyber Agency" 0 15706 0 0 0
"Barang" "Bajaur Agency" 0 15706 0 0 0
"Birmal" "South Waziristan Agency" 0 15706 0 0 0
"Central Kurram" "Kurram Agency" 0 15706 0 0 0
"Central Orakzai" "Orakzai Agency" 0 15706 0 0 0
"Data Khel" "North Waziristan Agency" 0 15706 0 0 0
"Dossali" "North Waziristan Agency" 0 15706 0 0 0
"Garyum" "North Waziristan Agency" 0 15706 0 0 0
"Ghulam Khan" "North Waziristan Agency" 0 15706 0 0 0
"Halimzai" "Mohmand Agency" 0 15706 0 0 0
"Ismailzai" "Orakzai Agency" 0 15706 0 0 0
"Jamrud" "Khyber Agency" 0 15706 0 0 0
"Khar" "Bajaur Agency" 0 15706 0 0 0
"Ladha" "South Waziristan Agency" 0 15706 0 0 0
"Landi Kotal" "Khyber Agency" 0 15706 0 0 0
"Lower Kurram" "Kurram Agency" 0 15706 0 0 0
"Lower Orakzai" "Orakzai Agency" 0 15706 0 0 0
"Makin" "South Waziristan Agency" 0 15706 0 0 0
"Mamund" "Bajaur Agency" 0 15706 0 0 0
"Mir Ali" "North Waziristan Agency" 0 15706 0 0 0
"Miran Shah" "North Waziristan Agency" 0 15706 0 0 0
"Mula Ghori" "Khyber Agency" 0 15706 0 0 0
"Nawagai" "Bajaur Agency" 0 15706 0 0 0
"Pindiali" "Mohmand Agency" 0 15706 0 0 0
"Prang Ghar" "Mohmand Agency" 0 15706 0 0 0
"Razmak" "North Waziristan Agency" 0 15706 0 0 0
"Safi" "Mohmand Agency" 0 15706 0 0 0
"Salarzai Tehsil" "Bajaur Agency" 0 15706 0 0 0
"Saraogha" "South Waziristan Agency" 0 15706 0 0 0
"Serwekai" "South Waziristan Agency" 0 15706 0 0 0
"Shewa" "North Waziristan Agency" 0 15706 0 0 0
"Spinwam" "North Waziristan Agency" 0 15706 0 0 0
"Tiarza" "South Waziristan Agency" 0 15706 0 0 0
"Toi Khulla" "South Waziristan Agency" 0 15706 0 0 0
"Upper Kurram" "Kurram Agency" 0 15706 0 0 0
"Upper Momand" "Mohmand Agency" 0 15706 0 0 0
"Upper Orakzai" "Orakzai Agency" 0 15706 0 0 0
"Utman Khel Tehsil" "Bajaur Agency" 0 15706 0 0 0
"Wana" "South Waziristan Agency" 0 15706 0 0 0
"Yaka Ghund" "Mohmand Agency" 0 15706 0 0 0
end

replace tehsil = lower(tehsil) //transform tehsil labels into lower case text

//aggregate indicidents by day and tehsil
collapse (sum) totCas inj totDead incidents, by(date district tehsil)

//generate concise district labels
g dist = "nwa" if district=="North Waziristan Agency"
replace dist = "kurram" if district=="Kurram Agency"
replace dist = "bajaur" if district=="Bajaur Agency"
replace dist = "khyber" if district=="Khyber Agency"
replace dist = "mohmand" if district=="Mohmand Agency"
replace dist = "orakzai" if district=="Orakzai Agency"
replace dist = "swa" if district=="South Waziristan Agency"
drop district

drop if dist=="" //Drop incidents not assigned to any district. This removes any
				 //incidents we were unable to geocode.

//generate numeric tehsil and district labels
encode dist, g(distn)
encode tehsil, g(tehsiln)

tsset tehsiln date
tsfill, full //use placeholder data to add in observations for all tehsil-days
			 //in which no violence occurred

//generate year and half-year variables
g year = year(date)
g hyear = halfyear(date)

drop if year<2003 //drop observations before 2003

//generate quarter, month, and week variables
g quarter = quarter(date)
g month = month(date)
g week = week(date)

//ensure consistent tehsil labels
replace tehsil = "ambar utman khel" if tehsiln==1
replace tehsil = "bar chamarkand" if tehsiln==2
replace tehsil = "bara" if tehsiln==3
replace tehsil = "barang" if tehsiln==4
replace tehsil = "birmal" if tehsiln==5
replace tehsil = "central kurram" if tehsiln==6
replace tehsil = "central orakzai" if tehsiln==7
replace tehsil = "data khel" if tehsiln==8
replace tehsil = "dossali" if tehsiln==9
replace tehsil = "garyum" if tehsiln==10
replace tehsil = "ghulam khan" if tehsiln==11
replace tehsil = "halimzai" if tehsiln==12
replace tehsil = "ismailzai" if tehsiln==13
replace tehsil = "jamrud" if tehsiln==14
replace tehsil = "khar" if tehsiln==15
replace tehsil = "ladha" if tehsiln==16
replace tehsil = "landi kotal" if tehsiln==17
replace tehsil = "lower kurram" if tehsiln==18
replace tehsil = "lower orakzai" if tehsiln==19
replace tehsil = "makin" if tehsiln==20
replace tehsil = "mamund" if tehsiln==21
replace tehsil = "mir ali" if tehsiln==22
replace tehsil = "miran shah" if tehsiln==23
replace tehsil = "mula ghori" if tehsiln==24
replace tehsil = "nawagai" if tehsiln==25
replace tehsil = "pindiali" if tehsiln==26
replace tehsil = "prang ghar" if tehsiln==27
replace tehsil = "razmak" if tehsiln==28
replace tehsil = "safi" if tehsiln==29
replace tehsil = "salarzai tehsil" if tehsiln==30
replace tehsil = "saraogha" if tehsiln==31
replace tehsil = "serwekai" if tehsiln==32
replace tehsil = "shewa" if tehsiln==33
replace tehsil = "spinwam" if tehsiln==34
replace tehsil = "tiarza" if tehsiln==35
replace tehsil = "toi khulla" if tehsiln==36
replace tehsil = "upper kurram" if tehsiln==37
replace tehsil = "upper momand" if tehsiln==38
replace tehsil = "upper orakzai" if tehsiln==39
replace tehsil = "utman khel tehsil" if tehsiln==40
replace tehsil = "wana" if tehsiln==41
replace tehsil = "yaka ghund" if tehsiln==42

//ensure consistent district labelling
replace distn = 1 if (tehsil=="bar chamarkand" | tehsil=="barang" | tehsil=="khar" | tehsil=="mamund" | tehsil=="nawagai" | tehsil=="salarzai tehsil" | tehsil=="utman khel tehsil")
replace distn = 2 if (tehsil=="bara" | tehsil=="jamrud" | tehsil=="landi kotal" | tehsil=="mula ghori")
replace distn = 3 if (tehsil=="central kurram" | tehsil=="lower kurram" | tehsil=="upper kurram")
replace distn = 4 if (tehsil=="ambar utman khel" | tehsil=="halimzai" | tehsil=="pindiali" | tehsil=="prang ghar" | tehsil=="safi" | tehsil=="upper momand" | tehsil=="yaka ghund")
replace distn = 5 if (tehsil=="data khel" | tehsil=="dossali" | tehsil=="garyum" | tehsil=="ghulam khan" | tehsil=="mir ali" | tehsil=="miran shah" | tehsil=="razmak" | tehsil=="shewa" | tehsil=="spinwam")
replace distn = 6 if (tehsil=="central orakzai" | tehsil=="ismailzai" | tehsil=="lower orakzai" | tehsil=="upper orakzai")
replace distn = 7 if (tehsil=="birmal" | tehsil=="ladha" | tehsil=="makin" | tehsil=="saraogha" | tehsil=="serwekai" | tehsil=="tiarza" | tehsil=="toi khulla" | tehsil=="wana")

replace dist = "bajaur" if distn==1
replace dist = "khyber" if distn==2
replace dist = "kurram" if distn==3
replace dist = "mohmand" if distn==4
replace dist = "nwa" if distn==5
replace dist = "orakzai" if distn==6
replace dist = "swa" if distn==7

g nwa = (dist=="nwa") //NWA dummy variable

//year-specific week, month, quarter, and half-year labels
g monyr = ym(year, month)
g qtryr= yq(year, quarter)
g yhy = yh(year,hyear)
g wky = yw(year,week)

//generate month-year, district, and tehsil dummy variables
tab monyr, g(my)
tab dist, g(dist)
tab tehsil, g(teh)

//generate treatment indicators
g treat = (yhy>95)
g trtXnwa = treat*nwa

g inc = (incidents>0) //dummy for any incident occurring in a tehsil-day

//time trend variables
g timeTrend = date - 15706
//district-specific
forv i = 1/7 {
	g d`i'XtiTr = dist`i'*timeTrend
}
//tehsil-specific
forv i = 1/42 {
	g teh`i'XtiTr = teh`i'*timeTrend
}

//label numeric district variable
label define distn 1 "Bajaur Agency" 2 "Khyber Agency" 3 "Kurram Agency" 4 "Mohmand Agency" 5 "NWA" 6 "Orakzai" 7 "SWA", replace

//saves violence dataset, without additional covaraites
save "$data/incFinalBFRStehsil", replace


*** Add Drones Strike Variables ***

//import strike-level drone strike data
import delimited "$rawData/drone_bij_tehsil.csv", clear

//label date of strike consistently with violence data
g date2 = date(date, "DMY")
drop date
rename date2 date

g strikes=1 //used to construct monthly counts variable later

//aggregate strikes by tehsil and day
collapse (sum) strikes, by(tehsil date)

//merge with violence data
merge m:m tehsil date using "$data/incFinalBFRStehsil"

//label days without strikes as such
replace strikes = 0 if strikes==.
replace totCas = 0 if totCas==.
replace inj = 0 if inj==.
replace totDead = 0 if totDead==.
replace incidents = 0 if incidents==.

//drop observations occurring after 2011
drop if year>2011

drop _merge

save "$data/incFinalBFRStehsil", replace



*** Add Military Operations, Obama Review and Haqqani Peace Deal Variables ***

//import month-year-tehsil miltiary operations data
import delimited "$rawData/mo_tehsilmonyear.csv", clear

//ensure consistent tehsil labelling
rename data_fatatehsil tehsil
replace tehsil = lower(tehsil)
encode tehsil, g(tehsiln)

//ensure consistent month variable
g month2 = 1 if month=="Jan"
replace month2 = 2 if month=="Feb"
replace month2 = 3 if month=="Mar"
replace month2 = 4 if month=="Apr"
replace month2 = 5 if month=="May"
replace month2 = 6 if month=="Jun"
replace month2 = 7 if month=="Jul"
replace month2 = 8 if month=="Aug"
replace month2 = 9 if month=="Sep"
replace month2 = 10 if month=="Oct"
replace month2 = 11 if month=="Nov"
replace month2 = 12 if month=="Dec"
drop month tehsil tehsilmoyear data_fatadistrict
rename month2 month

//drop observations outside study period
drop if year<2003
drop if year>2011
drop if (month==12 & year==2011)

g monyr = ym(year, month) //month-year label

save "$data/cov", replace //save data for use in merge

//merge military operations data with violence dataset
use "$data/incFinalBFRStehsil", clear
merge m:1 tehsiln monyr using "$data/cov"
drop _merge
replace mo=0 if mo==.
replace obamareview = 0 if obamareview==.
replace haqqanipakistan = 0 if haqqanipakistan==.
save "$data/incFinalBFRStehsil", replace


*** Add Other Peace Deal Variables ***

use "$rawData/PD", clear //import district-level peace deal data
gen month = month(date)
gen year = year(date)
drop if year<2003
drop if year>2011
drop if (month==12 & year==2011)
drop month year
save "$data/PD1", replace //save for use in merge

//merge peace deals data with violence data
use "$data/incFinalBFRStehsil", clear
merge m:1 distn date using "$data/PD1"
drop _merge
replace peace=0 if peace==.
save "$data/incFinalBFRStehsil", replace //final day-tehsil-level violence data

